/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.searcher;
import java.io.IOException;
import java.io.File;
import java.util.HashMap;
import net.nutch.io.*;
import net.nutch.db.*;
import net.nutch.fetcher.*;
import net.nutch.pagedb.*;
import net.nutch.indexer.*;
/** Implements {@link HitSummarizer} and {@link HitContent} for a set of
* fetched segments. */
public class FetchedSegments implements HitSummarizer, HitContent {
private static class Segment {
private ArrayFile.Reader fetcher;
private ArrayFile.Reader content;
private ArrayFile.Reader text;
public Segment(File segmentDir) throws IOException {
this.fetcher = new ArrayFile.Reader
(new File(segmentDir, FetcherOutput.DIR_NAME).toString());
this.content = new ArrayFile.Reader
(new File(segmentDir, FetcherContent.DIR_NAME).toString());
this.text = new ArrayFile.Reader
(new File(segmentDir, FetcherText.DIR_NAME).toString());
}
public FetcherOutput getFetcherOutput(int docNo) throws IOException {
FetcherOutput entry = new FetcherOutput();
fetcher.get(docNo, entry);
return entry;
}
public byte[] getContent(int docNo) throws IOException {
FetcherContent entry = new FetcherContent();
content.get(docNo, entry);
return entry.getContent();
}
public String getText(int docNo) throws IOException {
FetcherText entry = new FetcherText();
text.get(docNo, entry);
return entry.getText();
}
}
private HashMap segments = new HashMap();
/** Construct given a directory containing fetcher output. */
public FetchedSegments(String segmentsDir) throws IOException {
File[] segmentDirs = new File(segmentsDir).listFiles();
if (segmentDirs != null) {
for (int i = 0; i < segmentDirs.length; i++) {
File segmentDir = segmentDirs[i];
File indexdone = new File(segmentDir, IndexSegment.DONE_NAME);
if(indexdone.exists() && indexdone.isFile()) {
segments.put(segmentDir.getName(), new Segment(segmentDir));
}
}
}
}
public String[] getSegmentNames() {
return (String[])segments.keySet().toArray(new String[segments.size()]);
}
public byte[] getContent(HitDetails details) throws IOException {
return getSegment(details).getContent(getDocNo(details));
}
public String[] getAnchors(HitDetails details) throws IOException {
return getSegment(details).getFetcherOutput(getDocNo(details))
.getFetchListEntry().getAnchors();
}
public String getSummary(HitDetails details, Query query)
throws IOException {
String text = getSegment(details).getText(getDocNo(details));
return new Summarizer().getSummary(text, query).toString();
}
public String[] getSummary(HitDetails[] details, Query query)
throws IOException {
String[] results = new String[details.length];
for (int i = 0; i < details.length; i++)
results[i] = getSummary(details[i], query);
return results;
}
private Segment getSegment(HitDetails details) {
return (Segment)segments.get(details.getValue("segment"));
}
private int getDocNo(HitDetails details) {
return Integer.parseInt(details.getValue("docNo"), 16);
}
}